#' Find country specific ratings
#'
#' \code{find_coders} returns a subsetted matrix with the ratings of
#' only those coders who have coded the given country.
#'
#' @param m wide formatted numeric matrix
#' @param country_text_id three letter ID of target country
#' @param missing Value denoting missing observations to distinguish
#'     coders who have coded for the given target country.
#' @param lateral Whether to include or exclude lateral coders.
#'
#' @details \code{find_coders} expects a wide formatted matrix typical
#'     of the V-Dem Measurement Model preparation process --- each
#'     coder is a separate column and the rownames contain the
#'     country-dates (concatenated \code{country_text_id} and
#'     \code{historical_date}).
#'
#'     You have no idea how much time this stupid little function
#'     saves...
#'
#' @return Subsetted numeric matrix
#'
#' @export
find_coders <- function(m, country_text_id, missing, lateral)
    UseMethod("find_coders")

#' @export
find_coders.matrix <- function(m, country_text_id, missing = NA, lateral = T) {
    submat <- m[grepl(country_text_id, rownames(m)),, drop = F]

    fn <- function(x) {
        b <- if (is.na(missing)) sum(!is.na(x)) else sum(x != missing)
        if (!lateral) b > 1 else b > 0
    }

    submat[sort_text_id(rownames(submat)), apply(submat, 2, fn), drop = F]
}

#' Check columns of a data.frame or matrix for strange values
#'
#' Goes through column by column to check for NA's, Dates out of
#' range, empty strings etc.
#'
#' @param df A data.frame or a matrix object
#'
#' @examples
#' df <- data.frame(a = c(NA, 1, 2), b = c("", "A", NA),
#'                  stringsAsFactors = FALSE)
#' check_columns(df)
#'
#' @export
check_columns <- function(df) UseMethod("check_columns")

#' @export
check_columns.data.frame <- function(df) {
    if (!is.data.frame(df))
        stop("Object checked is no data.frame!")

    # Determine functions to check for each cell by type, must
    # evaluate to logical
    char_funs <- c(is.na,
                   function(x) x == "",
                   function(x) grepl("\n", x, fixed = T),
                   function(x) grepl("\r", x, fixed = T),
                   function(x) trimws(x, which = "both") != x)
    num_funs <- c(is.na,
                  is.nan,
                  function(x) x < 0)
    date_funs <- c(is.na,
                   function(x) x > Sys.Date(),
                   function(x) x < as.Date(c("17880101"), format = '%Y%m%d'))
    factor_funs <- c(function(x) T,
                     is.na)
    other_funs <- c(is.na)

    # Name functions for output
    names(char_funs) <- c("NA", "''", "\\n", "\\r", "leading or trailing white spaces")
    names(num_funs) <- c("NA", "NaN", "<0")
    names(date_funs) <- c("NA", "> today", "< 1788-01-01")
    names(factor_funs) <- c("type!", "NA")
    names(other_funs) <- c("NA")

    # Check each cell in each column
    check_cols_f <- function(df, funs, n = names(funs), type) {
        if (nrow(df) == 0)
            return(NULL)

        invisible(lapply(seq_along(funs), function(x, n, f) {
            res <- Filter(isTRUE, unlist(lapply(lapply(df, f[[x]]), any)))

            if (length(res) > 0) {
                sprintf("Column(s) %s have: %s %s",
                        paste(names(res), collapse = ", "),
                        type,
                        n[x]) %>% warn
            }
        }, n = n, f = funs))
    }

    # Split data.frame into types
    df_char <- df[, lapply(df, class) == "character", drop = F]
    df_num <- df[, lapply(df, class) == "numeric", drop = F]
    df_date <- df[, lapply(df, class) == "Date", drop = F]
    df_factor <- df[, lapply(df, class) == "factor", drop = F]
    df_other <- df[, !colnames(df) %in% c(colnames(df_char), colnames(df_num),
                                         colnames(df_date), colnames(df_factor)), drop = F]

    # Call sub functions on each sub data.frame
    check_cols_f(df_num, num_funs, type = "numeric")
    check_cols_f(df_char, char_funs, type = "character")
    check_cols_f(df_date, date_funs, type = "Date")
    check_cols_f(df_factor, factor_funs, type = "factor")
    check_cols_f(df_other, other_funs, type = "Other")
}

#' @export
check_columns.matrix <- function(df)
    check_columns.data.frame(as.data.frame(df, stringsAsFactors = F))

#' Traverse coder-country network
#'
#' For a specific C variable and from a given start country,
#' \code{traverse} returns a DataFrame of countries accessible through
#' bridge/lateral coding.
#'
#' @param x \code{DataFrame} long-formatted coder-level data
#' @param root Country_id start position
#'
#' @details Ideally, for a particular C variable, every country (node)
#'     should be linked through bridge/lateral coding. Countries which
#'     do not appear in the output \code{DataFrame} are isolated
#'     nodes.
#'
#' @export
traverse <- function(x, root = 20) {
    aux <- data.frame(country_id = NULL, count = NULL)

    # For each country node, loop over each coder and for
    # every child country node if we haven't marked it yet with our
    # ad-hoc `aux` "stack", recursively descend.
    recurse <- function(parent = root) {
        coders <- x$coder_id[x$country_id == parent]

        for (c_coder in coders) {
            countries <- x$country_id[x$coder_id == c_coder & x$country_id != parent]

            if (length(countries) > 0) {
                for (c_country in countries) {
                    if (c_country != root & !c_country %in% aux$country_id) {
                        aux <<- rbind(aux, data.frame(country_id = c_country, count = 1))

                        recurse(c_country)
                    } else {
                        aux$count[aux$country_id == c_country] <<-
                            aux$count[aux$country_id == c_country] + 1
                    }
                }
            }

        }
    }

    recurse
    return(aux)
}
